This figure shows how/if different phenotypes of differentiated and undifferentiated cells of BE are maintained in cancer.
library("edgeR")
library("ggplot2")
library("pheatmap")
library("openxlsx")
library("RColorBrewer")
library("reshape2")
library("ggpubr")
library("knitr")
library("gridExtra")
# library('biomaRt')
files.dir <- "~/Dropbox/Postdoc/2019-12-29_BE2020/RNA-seq/"
files.out <- "~/Dropbox/Postdoc/2019-12-29_BE2020/Figures/"
anno_col <- list(Project.ID = c(Gastric = "#4DBBD5FF", Duodenum = "#3C5488FF", `Barrett's` = "#00A087FF",
`ICGC-ESAD` = "#D18118FF", Squamous = "darkred"), Tissue = c(NG = "#4DBBD5FF",
NE = "dark red", BE = "#00A087FF", ND = "#3C5488FF", SMG = "#B09C85FF"), RP.BarrettsAdjacent = c(ND = "grey",
yes = "#E0B400FF", no = "#00DCAFFF"))
breaksListZ = seq(-2, 2, by = 0.05)
Let’s begin by loading all of the data
# EAC and normal data (tmp, linear scale)
bulk.data <- readRDS(paste0(files.dir, "/EAC-all/count_codingGenes_clean.rds"))
################# use this coded to download the annotation and store re in the genesID.rds
################# ensembl = useMart('ensembl') ensembl = useDataset('hsapiens_gene_ensembl', mart
################# = ensembl) genesID<-getBM(attributes = c('hgnc_symbol', 'ensembl_gene_id',
################# 'gene_biotype'), mart = ensembl) saveRDS(genesID, file = paste0(files.dir,
################# '/Figure_5C/genesID.rds'))
genesID <- readRDS(paste0(files.dir, "/genesID.rds"))
# remove duplicated entries (keep the first gene on the list in)
genesID2 <- genesID[genesID$hgnc_symbol %in% rownames(bulk.data), ]
genesID2 <- genesID2[!(duplicated(genesID2$hgnc_symbol)), ]
genesID2 <- genesID2[!(duplicated(genesID2$ensembl_gene_id)), ]
bulk.data <- bulk.data[genesID2$hgnc_symbol, ]
rownames(bulk.data) <- genesID2$ensembl_gene_id
Let’s all all of the metadata
# get metadata
# get information about the samples from EAC
rowID <- readRDS(paste0(files.dir, "/EAC-all/samplesAnnotation_clean.rds"))
# #keep only data for sample group and clinical info about Barrett's next to
# cancer
colnames(rowID)[6] <- "Project.ID"
rowID$Project.ID[rowID$Project.ID == "EAC"] <- "ICGC-ESAD"
rowID$Project.ID[rowID$Project.ID == "Barretts"] <- "Barrett's"
rowID$Sample.Type[rowID$Project.ID == "ICGC-ESAD"] <- "Primary Tumor"
rowID$Sample.Type[rowID$Project.ID != "ICGC-ESAD"] <- "Solid Tissue Normal"
rowID$RP.BarrettsAdjacent[is.na(rowID$RP.BarrettsAdjacent)] <- "ND"
rowID$RP.BarrettsAdjacent[rowID$RP.BarrettsAdjacent == "unknown"] <- "ND"
rowID$primary_diagnosis[rowID$Project.ID == "ICGC-ESAD"] <- "Adenocarcinoma"
rowID$primary_diagnosis[rowID$Project.ID != "ICGC-ESAD"] <- rowID$Project.ID[rowID$Project.ID !=
"ICGC-ESAD"]
rowID$site_of_resection_or_biopsy <- "ND"
rowID$Case.ID <- row.names(rowID)
rowID$Sample.ID <- rowID$Case.ID
Let’s create edgeR object that will carry all of the information. I include all of the samples. From this object I will get normalized, batch corrected expression values
group <- factor(rowID$primary_diagnosis)
batch <- factor(rowID$batch)
y <- DGEList(bulk.data, group = group)
# #keep only genes with expression above 1 cpm in at least 3 samples
keep <- rowSums(edgeR::cpm(y) > 1) >= 3
y <- y[keep, , keep.lib.sizes = FALSE]
# get normalization factors
y <- calcNormFactors(y)
# Start performing diff expression - get sample desing model
design <- model.matrix(~batch + group)
# design <- model.matrix(~ 0 + group)
rownames(design) <- colnames(y)
# colnames(design) <- levels(group)
# Estimate disparsion
y <- estimateDisp(y, design, robust = TRUE)
plotBCV(y)
# Fit general linear model data
fit <- glmQLFit(y, design, robust = TRUE)
plotQLDisp(fit)
Let’s perform multidimension analysis of the data using building MDS function from edgeR and PC analysis.
# This can be used to get outliers
points <- c(0, 1, 2, 1, 15, 16, 17)
points <- rep(c(0, 1, 2), 3)
colors <- rep(c("blue", "darkgreen", "red", "black"), 3)
p <- limma::plotMDS(y, col = colors[group], pch = points[group], plot = FALSE)
p2 <- data.frame(p$x, p$y, .names = rowID$primary_diagnosis, .names2 = rowID$batch)
ggplot(data = p2) + geom_point(mapping = aes(x = p.x, y = p.y, colour = .names),
show.legend = TRUE) + theme_bw() + labs(x = paste0(p$axislabel, " 1"), y = paste0(p$axislabel,
" 2")) + theme(plot.title = element_text(hjust = 0.5)) + ggtitle("MDS analysis")
ggplot(data = p2) + geom_point(mapping = aes(x = p.x, y = p.y, colour = .names2),
show.legend = TRUE) + theme_bw() + labs(x = paste0(p$axislabel, " 1"), y = paste0(p$axislabel,
" 2")) + theme(plot.title = element_text(hjust = 0.5)) + ggtitle("MDS analysis")
For the analysis here I use 500 top HGV. All samples
# Create gene count matrix that will be used for expression display
geneMatrix <- edgeR::cpm(y, prior.count = 1, log = TRUE, normalized.lib.sizes = TRUE)
countVar <- apply(geneMatrix, 1, var)
highVar <- order(countVar, decreasing = TRUE)[1:500]
hmDat <- geneMatrix[highVar, ]
pca <- prcomp(t(hmDat), center = TRUE, scale. = TRUE)
loadings <- data.frame(pca$x, .names = rowID$primary_diagnosis, .names2 = rowID$batch)
ggplot(data = loadings) + geom_point(mapping = aes(x = PC1, y = PC2, colour = .names),
show.legend = TRUE) + theme_bw() + labs(x = paste0("PC1, ", round(summary(pca)$importance[2,
1] * 100, 2), "% variance"), y = paste0("PC2, ", round(summary(pca)$importance[2,
2] * 100, 2), "% variance")) + theme(plot.title = element_text(hjust = 0.5)) +
ggtitle("PCA - 500 HVGs")
ggplot(data = loadings) + geom_point(mapping = aes(x = PC1, y = PC2, colour = .names2),
show.legend = TRUE) + theme_bw() + labs(x = paste0("PC1, ", round(summary(pca)$importance[2,
1] * 100, 2), "% variance"), y = paste0("PC2, ", round(summary(pca)$importance[2,
2] * 100, 2), "% variance")) + theme(plot.title = element_text(hjust = 0.5)) +
ggtitle("PCA - 500 HVGs")
pheatmap(hmDat, annotation_col = rowID[, c(1, 4, 6)], clustering_method = "ward.D2",
main = "500 HVGs, all data", show_rownames = FALSE)
For the analysis here I use BE phenotype cell markers. Eso cancers and normal cells.
# read marker genes for BE and GC Here I am using data from table S9 that has
# specific marker genes for these tissue types
files.dir2 <- "~/Dropbox/Postdoc/2019-12-29_BE2020/Figures/Supplemental_tables/"
# load workbook with marker genes
tmp <- loadWorkbook(paste0(files.dir2, "Table_S9.xlsx"))
# create a list with markers
markers <- list()
# read the content of marker files
for (i in names(tmp)) {
markers[[i]] <- read.xlsx(tmp, sheet = i)
}
# read marker genes for endocrine cells
markers[["Endocrine"]] <- read.xlsx(loadWorkbook(paste0(files.dir2, "Table_S7.xlsx")),
sheet = "Endocrine_NEUROG3")
# Keep only markers that are over 1 logFC and FDR <0.1
markers[["Endocrine"]] <- markers[["Endocrine"]][apply(markers[["Endocrine"]][, 1:10],
1, min) > 1 & markers[["Endocrine"]]$FDR < 0.1, ]
# read marker genes for goblet cells
markers[["Goblet"]] <- read.xlsx(loadWorkbook(paste0(files.dir2, "Table_S7.xlsx")),
sheet = "Goblet")
# Keep only markers that are over 1 logFC and FDR <0.1
markers[["Goblet"]] <- markers[["Goblet"]][apply(markers[["Goblet"]][, 1:10], 1,
min) > 1 & markers[["Goblet"]]$FDR < 0.1, ]
Here, we perform DE between endrocrine cells of NG and BE
source("../Analysis/Functions/auxiliary.R")
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:gridExtra':
##
## combine
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## Loading required package: BiocParallel
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following objects are masked from 'package:base':
##
## aperm, apply, rowsum
##
## Attaching package: 'SingleCellExperiment'
## The following object is masked from 'package:edgeR':
##
## cpm
##
## Attaching package: 'scater'
## The following object is masked from 'package:limma':
##
## plotMDS
##
## Attaching package: 'destiny'
## The following object is masked from 'package:SummarizedExperiment':
##
## distance
## The following object is masked from 'package:GenomicRanges':
##
## distance
## The following object is masked from 'package:IRanges':
##
## distance
# Read all scRNA-seq data
sce <- readRDS("~/Dropbox/Postdoc/2019-12-29_BE2020/All_corrected_sce_filtered.rds")
# Keep only data for NG and BE
cur_sce <- sce[,sce$include & sce$Tissue %in% c("NG", "BE")]# & sce$Patient != "Patient06"]
# Remove genes that are not expressed
cur_sce <- cur_sce[Matrix::rowSums(counts(cur_sce)) > 0,]
# DE within undiff cells tissue
sce_test <- cur_sce[,cur_sce$cell_type == "Endocrine"]
#Remove patients with fewer than 10 cells (removes 1 patients NG sample that had 1 cells)
sce_test$groups<-paste(sce_test$Patient, sce_test$Tissue, sce_test$cell_type, sep = "_")
sce_test <- sce_test[,sce_test$groups %in% names(table(sce_test$groups)[table(sce_test$groups)>=10])]
Endo_BE_vs_NG <- DE.edgeR(sce_test, conditions = sce_test$Tissue,
covariate = sce_test$Patient, lfc = 0.5,
FDR = 1)
[1] “Patient02 NG” “Patient13 NG” “Patient07 NG” “Patient03 NG” “Patient09 NG” [6] “Patient01 NG” “Patient10 NG” “Patient08 NG” “Patient07 BE” “Patient03 BE” [11] “Patient14 BE” “Patient09 BE”
Endo_BE_vs_NG <- rbind(Endo_BE_vs_NG$NG,
Endo_BE_vs_NG$BE[
seq(nrow(Endo_BE_vs_NG$BE), 1),])
Endo_BE_vs_NG$ID <- rownames(Endo_BE_vs_NG)
Endo_BE_vs_NG$specificity <- NA
Endo_BE_vs_NG$specificity[Endo_BE_vs_NG$logFC < 0 & Endo_BE_vs_NG$FDR <= 0.1] <- "NG"
Endo_BE_vs_NG$specificity[Endo_BE_vs_NG$logFC > 0 & Endo_BE_vs_NG$FDR <= 0.1] <- "BE"
# Identify BE specific genes
# Endocrine
# BE specific
cur_1 <- Endo_BE_vs_NG[Endo_BE_vs_NG$specificity == "BE" &
!is.na(Endo_BE_vs_NG$specificity),]
cur_2 <- markers[["Endocrine"]]
rownames(cur_2) <- cur_2$ID
shared.genes <- intersect(rownames(cur_1), rownames(cur_2))
BE_endo <- cbind(cur_1[shared.genes,], cur_2[shared.genes,])
BE_endo <- BE_endo[,c(1,5,9:32,8)]
colnames(BE_endo)[1:24] <- c(paste(colnames(BE_endo)[1:2], "BE_vs_NG", sep = "."),
paste("Endo_vs",colnames(BE_endo)[3:22], sep = "_"),
colnames(BE_endo)[23],
"Combined_vs_other_FDR")
# Order table
BE_endo <- BE_endo[order(rowMeans(BE_endo[,c(2,24)]), decreasing = FALSE),]
markers[["Endocrine2"]] <- BE_endo
write.xlsx(BE_endo, file = "~/Dropbox/Postdoc/2019-12-29_BE2020/Figures/Supplemental_tables/Table_S10.xlsx")
selection <- c("Barrett's", "Squamous", "Gastric", "Duodenum", "ICGC-ESAD")
hmDat <- geneMatrix[, rownames(rowID)[rowID$Project.ID %in% selection]]
#################################################################################### get marker genes for goblet cells and BE differentiated cells
iiii <- c("Goblet", "BE_diff_specific")
# iiii<-c( 'BE_diff_specific')
genes.list <- vector()
for (i in iiii) {
genes.list <- unique(c(genes.list, markers[[i]]$ID))
}
# plot z-score data
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "BE Diff markers CAM BE, GC, D2 and EAC - z-score",
show_rownames = FALSE, cluster_rows = T, scale = "row", breaks = breaksListZ,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(length(breaksListZ)),
show_colnames = FALSE, annotation_colors = anno_col) #,annotation_row = IDs)
# plot raw expression data
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "BE Diff markers CAM BE, GC, D2 and EAC",
show_rownames = FALSE, cluster_rows = TRUE, cutree_cols = 1, show_colnames = FALSE,
annotation_colors = anno_col) #,annotation_row = IDs)#, scale='row', breaks = breaksListZ,color = colorRampPalette(rev(brewer.pal(n = 7, name = 'RdBu')))(length(breaksListZ)))
########################################### Scaled to BE samples only########
# get the data for all important genes
hmDat2 <- hmDat[rownames(hmDat) %in% genes.list, ]
# scale the data around the mean expression in BE samples without any scaling
# factor. I use scale fuction to do it with center and scale arguments. Then
# calculate the median expression of scaled expression in each of the samples.
hmDat2 <- apply(apply(hmDat2, 1, function(x) scale(x, center = mean(x[rownames(rowID)[rowID$primary_diagnosis ==
"Barrett's"]]), scale = FALSE)), 1, median)
# fix the names
names(hmDat2) <- colnames(hmDat[rownames(hmDat) %in% genes.list, ])
# merge the median expression per sample with other clinical information
hmDat2 <- merge(rowID, hmDat2, by = 0, all.y = TRUE)
# remove cancer samples without Barrett's status
hmDat2 <- hmDat2[!(hmDat2$RP.BarrettsAdjacent == "ND" & hmDat2$Project.ID == "ICGC-ESAD"),
]
hmDat2$is.normal <- ifelse(hmDat2$Project.ID %in% c("Squamous", "Gastric", "Duodenum"),
"Normal", "NotNormal")
hmDat2$final <- paste0(hmDat2$Project.ID, hmDat2$is.normal, hmDat2$RP.BarrettsAdjacent)
hmDat2$colour <- paste0(hmDat2$is.normal, hmDat2$RP.BarrettsAdjacent)
# order the samples for the boxplots
hmDat2$Project.ID <- factor(hmDat2$Project.ID, levels = c("Squamous", "Gastric",
"Duodenum", "Barrett's", "ICGC-ESAD"))
hmDat2$RP.BarrettsAdjacent <- factor(hmDat2$RP.BarrettsAdjacent, levels = c("yes",
"no", "ND"))
hmDat2$RP.TumourGradingDifferentiationStatus <- factor(hmDat2$RP.TumourGradingDifferentiationStatus,
levels = c("poor", "moderate_to_poor", "moderate", "moderate_to_well", "well",
"unknown", NA))
hmDat2$final <- factor(hmDat2$final, levels = c("SquamousNormalND", "GastricNormalND",
"DuodenumNormalND", "Barrett'sNotNormalND", "ICGC-ESADNotNormalyes", "ICGC-ESADNotNormalno",
"ICGC-ESADNotNormalND"))
# plot all data without subgroups
p_all <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("All") + geom_point(position = position_jitterdodge(jitter.width = 0.5,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) +
theme_bw() + ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
p_all
# remove duodenum from the figure
hmDat3 <- hmDat2[hmDat2$primary_diagnosis != "Duodenum", ]
# plot all data without subgroups
p_all <- ggplot(hmDat3, aes(x = Project.ID, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle(paste0("All without Duodenum ",
length(genes.list))) + geom_point(position = position_jitterdodge(jitter.width = 0.5,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) +
theme_bw() + ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(3,
4)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
p_all
p_diff <- p_all + theme(legend.position = "none")
p_all <- ggplot(hmDat3, aes(x = final, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle(paste0("All without Duodenum ",
length(genes.list))) + geom_point(position = position_jitterdodge(jitter.width = 0.5,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) +
theme_bw() + ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5), c(3, 4), c(3, 5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01,
0.05, 1), symbols = c("****", "***", "**", "*", "ns")))
p_all
p_diff_BE <- p_all + theme(legend.position = "none")
# plot all data with cancers split according to Barrett's status
p <- ggplot(hmDat2, aes(x = final, y = y, fill = colour)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.8) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("Barrett's next to") +
geom_point(position = position_jitterdodge(jitter.width = 0.4, dodge.width = 1,
seed = 50014), pch = 21, aes(fill = colour), size = 2.5) + theme_bw() + ylab("Normalized expression") +
xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p <- p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(5,
6), c(4, 5), c(4, 6)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01,
0.05, 1), symbols = c("****", "***", "**", "*", "ns")))
p
ggsave(paste0(files.out, "./Figure_5/Figure_5C_Diff.pdf"), p_all, width = 12, height = 7,
useDingbats = FALSE)
kable(table(hmDat2$RP.BarrettsAdjacent[hmDat2$Project.ID == "ICGC-ESAD"]))
| Var1 | Freq |
|---|---|
| yes | 107 |
| no | 109 |
| ND | 0 |
# plot all data with cancers split according to Siewer Classification
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.SiewertClassification)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Siewert type") + geom_point(position = position_jitterdodge(jitter.width = 0.4,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.SiewertClassification),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
# plot all data with cancers split according to Tumour Differentiation Status
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.TumourGradingDifferentiationStatus)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Differentiation") + geom_point(position = position_jitterdodge(jitter.width = 0.05,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.TumourGradingDifferentiationStatus),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
# plot all data with cancers split according to Tumour stage
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.TStage.PrimaryTumour)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Stage") + geom_point(position = position_jitterdodge(jitter.width = 0.05,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.TStage.PrimaryTumour),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "BE Diff markers CAM BE, GC, D2 and EAC - z-score",
show_rownames = FALSE, cluster_rows = T, scale = "row", breaks = breaksListZ,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(length(breaksListZ)),
show_colnames = FALSE, annotation_colors = anno_col, file = paste0(files.out,
"./Figure_5/Figure_5C_Diff_heatmap.pdf"), width = 15)
# get marker genes for Endocrine cells
iiii <- c("Endocrine2")
genes.list <- vector()
for (i in iiii) {
genes.list <- unique(c(genes.list, markers[[i]]$ID))
}
# plot z-score data
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "BE Diff markers CAM BE, GC, D2 and EAC - z-score",
show_rownames = FALSE, cluster_rows = T, scale = "row", breaks = breaksListZ,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(length(breaksListZ)),
show_colnames = FALSE, annotation_colors = anno_col) #,annotation_row = IDs)
# plot raw expression data
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "BE Diff markers CAM BE, GC, D2 and EAC",
show_rownames = FALSE, cluster_rows = TRUE, cutree_cols = 1, show_colnames = FALSE,
annotation_colors = anno_col) #,annotation_row = IDs)#, scale='row', breaks = breaksListZ,color = colorRampPalette(rev(brewer.pal(n = 7, name = 'RdBu')))(length(breaksListZ)))
########################################### Scaled to BE samples only########
# get the data for all important genes
hmDat2 <- hmDat[rownames(hmDat) %in% genes.list, ]
# scale the data around the mean expression in BE samples without any scaling
# factor. I use scale fuction to do it with center and scale arguments. Then
# calculate the median expression of scaled expression in each of the samples.
hmDat2 <- apply(apply(hmDat2, 1, function(x) scale(x, center = mean(x[rownames(rowID)[rowID$primary_diagnosis ==
"Barrett's"]]), scale = FALSE)), 1, median)
# fix the names
names(hmDat2) <- colnames(hmDat[rownames(hmDat) %in% genes.list, ])
# merge the median expression per sample with other clinical information
hmDat2 <- merge(rowID, hmDat2, by = 0, all.y = TRUE)
# remove cancer samples without Barrett's status
hmDat2 <- hmDat2[!(hmDat2$RP.BarrettsAdjacent == "ND" & hmDat2$Project.ID == "ICGC-ESAD"),
]
hmDat2$is.normal <- ifelse(hmDat2$Project.ID %in% c("Squamous", "Gastric", "Duodenum"),
"Normal", "NotNormal")
hmDat2$final <- paste0(hmDat2$Project.ID, hmDat2$is.normal, hmDat2$RP.BarrettsAdjacent)
hmDat2$colour <- paste0(hmDat2$is.normal, hmDat2$RP.BarrettsAdjacent)
# order the samples for the boxplots
hmDat2$Project.ID <- factor(hmDat2$Project.ID, levels = c("Squamous", "Gastric",
"Duodenum", "Barrett's", "ICGC-ESAD"))
hmDat2$RP.BarrettsAdjacent <- factor(hmDat2$RP.BarrettsAdjacent, levels = c("yes",
"no", "ND"))
hmDat2$RP.TumourGradingDifferentiationStatus <- factor(hmDat2$RP.TumourGradingDifferentiationStatus,
levels = c("poor", "moderate_to_poor", "moderate", "moderate_to_well", "well",
"unknown", NA))
hmDat2$final <- factor(hmDat2$final, levels = c("SquamousNormalND", "GastricNormalND",
"DuodenumNormalND", "Barrett'sNotNormalND", "ICGC-ESADNotNormalyes", "ICGC-ESADNotNormalno",
"ICGC-ESADNotNormalND"))
# plot all data without subgroups
p_all <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("All") + geom_point(position = position_jitterdodge(jitter.width = 0.5,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) +
theme_bw() + ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
p_all
# remove duodenum from the figure
hmDat3 <- hmDat2[hmDat2$primary_diagnosis != "Duodenum", ]
# plot all data without subgroups
p_all <- ggplot(hmDat3, aes(x = Project.ID, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle(paste0("All without Duodenum ",
length(genes.list))) + geom_point(position = position_jitterdodge(jitter.width = 0.5,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) +
theme_bw() + ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(3,
4)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
p_all
p_all <- ggplot(hmDat3, aes(x = final, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle(paste0("All without Duodenum ",
length(genes.list))) + geom_point(position = position_jitterdodge(jitter.width = 0.5,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) +
theme_bw() + ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5), c(3, 4), c(3, 5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01,
0.05, 1), symbols = c("****", "***", "**", "*", "ns")))
p_all
p_endo_BE <- p_all + theme(legend.position = "none")
# plot all data with cancers split according to Barrett's status
p <- ggplot(hmDat2, aes(x = final, y = y, fill = colour)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.8) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("Barrett's next to") +
geom_point(position = position_jitterdodge(jitter.width = 0.4, dodge.width = 1,
seed = 50014), pch = 21, aes(fill = colour), size = 2.5) + theme_bw() + ylab("Normalized expression") +
xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p <- p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(5,
6), c(4, 5), c(4, 6)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01,
0.05, 1), symbols = c("****", "***", "**", "*", "ns")))
p
ggsave(paste0(files.out, "./Figure_5/Figure_5C_Endocrine.pdf"), p_all, width = 12,
height = 7, useDingbats = FALSE)
kable(table(hmDat2$RP.BarrettsAdjacent[hmDat2$Project.ID == "ICGC-ESAD"]))
| Var1 | Freq |
|---|---|
| yes | 107 |
| no | 109 |
| ND | 0 |
# plot all data with cancers split according to Siewer Classification
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.SiewertClassification)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Siewert type") + geom_point(position = position_jitterdodge(jitter.width = 0.4,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.SiewertClassification),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
# plot all data with cancers split according to Tumour Differentiation Status
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.TumourGradingDifferentiationStatus)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Differentiation") + geom_point(position = position_jitterdodge(jitter.width = 0.05,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.TumourGradingDifferentiationStatus),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
# plot all data with cancers split according to Tumour stage
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.TStage.PrimaryTumour)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Stage") + geom_point(position = position_jitterdodge(jitter.width = 0.05,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.TStage.PrimaryTumour),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "BE Endocrine markers CAM BE, NG, ND and EAC - z-score",
show_rownames = FALSE, cluster_rows = T, scale = "row", breaks = breaksListZ,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(length(breaksListZ)),
show_colnames = FALSE, annotation_colors = anno_col, file = paste0(files.out,
"/Figure_5/Figure_5C_Endocrine_heatmap.pdf"), width = 15)
# get marker genes for Endocrine cells
iiii <- c("Endocrine2", "BE_undiff_specific")
genes.list <- vector()
for (i in iiii) {
genes.list <- unique(c(genes.list, markers[[i]]$ID))
}
# plot z-score data
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "BE Diff markers CAM BE, GC, D2 and EAC - z-score",
show_rownames = FALSE, cluster_rows = T, scale = "row", breaks = breaksListZ,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(length(breaksListZ)),
show_colnames = FALSE, annotation_colors = anno_col) #,annotation_row = IDs)
# plot raw expression data
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "BE Diff markers CAM BE, GC, D2 and EAC",
show_rownames = FALSE, cluster_rows = TRUE, cutree_cols = 1, show_colnames = FALSE,
annotation_colors = anno_col) #,annotation_row = IDs)#, scale='row', breaks = breaksListZ,color = colorRampPalette(rev(brewer.pal(n = 7, name = 'RdBu')))(length(breaksListZ)))
########################################### Scaled to BE samples only########
# get the data for all important genes
hmDat2 <- hmDat[rownames(hmDat) %in% genes.list, ]
# scale the data around the mean expression in BE samples without any scaling
# factor. I use scale fuction to do it with center and scale arguments. Then
# calculate the median expression of scaled expression in each of the samples.
hmDat2 <- apply(apply(hmDat2, 1, function(x) scale(x, center = mean(x[rownames(rowID)[rowID$primary_diagnosis ==
"Barrett's"]]), scale = FALSE)), 1, median)
# fix the names
names(hmDat2) <- colnames(hmDat[rownames(hmDat) %in% genes.list, ])
# merge the median expression per sample with other clinical information
hmDat2 <- merge(rowID, hmDat2, by = 0, all.y = TRUE)
# remove cancer samples without Barrett's status
hmDat2 <- hmDat2[!(hmDat2$RP.BarrettsAdjacent == "ND" & hmDat2$Project.ID == "ICGC-ESAD"),
]
hmDat2$is.normal <- ifelse(hmDat2$Project.ID %in% c("Squamous", "Gastric", "Duodenum"),
"Normal", "NotNormal")
hmDat2$final <- paste0(hmDat2$Project.ID, hmDat2$is.normal, hmDat2$RP.BarrettsAdjacent)
hmDat2$colour <- paste0(hmDat2$is.normal, hmDat2$RP.BarrettsAdjacent)
# order the samples for the boxplots
hmDat2$Project.ID <- factor(hmDat2$Project.ID, levels = c("Squamous", "Gastric",
"Duodenum", "Barrett's", "ICGC-ESAD"))
hmDat2$RP.BarrettsAdjacent <- factor(hmDat2$RP.BarrettsAdjacent, levels = c("yes",
"no", "ND"))
hmDat2$RP.TumourGradingDifferentiationStatus <- factor(hmDat2$RP.TumourGradingDifferentiationStatus,
levels = c("poor", "moderate_to_poor", "moderate", "moderate_to_well", "well",
"unknown", NA))
hmDat2$final <- factor(hmDat2$final, levels = c("SquamousNormalND", "GastricNormalND",
"DuodenumNormalND", "Barrett'sNotNormalND", "ICGC-ESADNotNormalyes", "ICGC-ESADNotNormalno",
"ICGC-ESADNotNormalND"))
# plot all data without subgroups
p_all <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("All") + geom_point(position = position_jitterdodge(jitter.width = 0.5,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) +
theme_bw() + ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
p_all
# remove duodenum from the figure
hmDat3 <- hmDat2[hmDat2$primary_diagnosis != "Duodenum", ]
# plot all data without subgroups
p_all <- ggplot(hmDat3, aes(x = Project.ID, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle(paste0("All without Duodenum",
length(genes.list))) + geom_point(position = position_jitterdodge(jitter.width = 0.5,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) +
theme_bw() + ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(3,
4)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
p_all
p_undiff <- p_all + theme(legend.position = "none")
p_all <- ggplot(hmDat3, aes(x = final, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle(paste0("All without Duodenum ",
length(genes.list))) + geom_point(position = position_jitterdodge(jitter.width = 0.5,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) +
theme_bw() + ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5), c(3, 4), c(3, 5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01,
0.05, 1), symbols = c("****", "***", "**", "*", "ns")))
p_all
p_undiff_BE <- p_all + theme(legend.position = "none")
# plot all data with cancers split according to Barrett's status
p <- ggplot(hmDat2, aes(x = final, y = y, fill = colour)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.8) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("Barrett's next to") +
geom_point(position = position_jitterdodge(jitter.width = 0.4, dodge.width = 1,
seed = 50014), pch = 21, aes(fill = colour), size = 2.5) + theme_bw() + ylab("Normalized expression") +
xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p <- p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(5,
6), c(4, 5), c(4, 6)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01,
0.05, 1), symbols = c("****", "***", "**", "*", "ns")))
p
ggsave(paste0(files.out, "./Figure_5/Figure_5C_Undiff_Endocrine.pdf"), p_all, width = 12,
height = 7, useDingbats = FALSE)
all.figures <- grid.arrange(p_undiff, p_diff, nrow = 1)
ggsave(paste0(files.out, "./Figure_5/Figure_5C_Combined.pdf"), all.figures, width = 6,
height = 3, useDingbats = FALSE)
all.figures <- grid.arrange(p_undiff_BE, p_diff_BE, nrow = 1)
ggsave(paste0(files.out, "./Figure_5/Figure_5C_Combined_BE.pdf"), all.figures, width = 9,
height = 5, useDingbats = FALSE)
kable(table(hmDat2$RP.BarrettsAdjacent[hmDat2$Project.ID == "ICGC-ESAD"]))
| Var1 | Freq |
|---|---|
| yes | 107 |
| no | 109 |
| ND | 0 |
# plot all data with cancers split according to Siewer Classification
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.SiewertClassification)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Siewert type") + geom_point(position = position_jitterdodge(jitter.width = 0.4,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.SiewertClassification),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
# plot all data with cancers split according to Tumour Differentiation Status
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.TumourGradingDifferentiationStatus)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Differentiation") + geom_point(position = position_jitterdodge(jitter.width = 0.05,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.TumourGradingDifferentiationStatus),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
# plot all data with cancers split according to Tumour stage
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.TStage.PrimaryTumour)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Stage") + geom_point(position = position_jitterdodge(jitter.width = 0.05,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.TStage.PrimaryTumour),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "BE Endocrine markers CAM BE, NG, ND and EAC - z-score",
show_rownames = FALSE, cluster_rows = T, scale = "row", breaks = breaksListZ,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(length(breaksListZ)),
show_colnames = FALSE, annotation_colors = anno_col, file = paste0(files.out,
"/Figure_5/Figure_5C_Undiff_Endocrine_heatmap.pdf"), width = 15)
# get marker genes for Endocrine cells
iiii <- c("Endocrine")
genes.list <- vector()
for (i in iiii) {
genes.list <- unique(c(genes.list, markers[[i]]$ID))
}
# plot z-score data
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "BE Diff markers CAM BE, GC, D2 and EAC - z-score",
show_rownames = FALSE, cluster_rows = T, scale = "row", breaks = breaksListZ,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(length(breaksListZ)),
show_colnames = FALSE, annotation_colors = anno_col) #,annotation_row = IDs)
# plot raw expression data
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "BE Diff markers CAM BE, GC, D2 and EAC",
show_rownames = FALSE, cluster_rows = TRUE, cutree_cols = 1, show_colnames = FALSE,
annotation_colors = anno_col) #,annotation_row = IDs)#, scale='row', breaks = breaksListZ,color = colorRampPalette(rev(brewer.pal(n = 7, name = 'RdBu')))(length(breaksListZ)))
########################################### Scaled to BE samples only########
# get the data for all important genes
hmDat2 <- hmDat[rownames(hmDat) %in% genes.list, ]
# scale the data around the mean expression in BE samples without any scaling
# factor. I use scale fuction to do it with center and scale arguments. Then
# calculate the median expression of scaled expression in each of the samples.
hmDat2 <- apply(apply(hmDat2, 1, function(x) scale(x, center = mean(x[rownames(rowID)[rowID$primary_diagnosis ==
"Barrett's"]]), scale = FALSE)), 1, median)
# fix the names
names(hmDat2) <- colnames(hmDat[rownames(hmDat) %in% genes.list, ])
# merge the median expression per sample with other clinical information
hmDat2 <- merge(rowID, hmDat2, by = 0, all.y = TRUE)
# remove cancer samples without Barrett's status
hmDat2 <- hmDat2[!(hmDat2$RP.BarrettsAdjacent == "ND" & hmDat2$Project.ID == "ICGC-ESAD"),
]
hmDat2$is.normal <- ifelse(hmDat2$Project.ID %in% c("Squamous", "Gastric", "Duodenum"),
"Normal", "NotNormal")
hmDat2$final <- paste0(hmDat2$Project.ID, hmDat2$is.normal, hmDat2$RP.BarrettsAdjacent)
hmDat2$colour <- paste0(hmDat2$is.normal, hmDat2$RP.BarrettsAdjacent)
# order the samples for the boxplots
hmDat2$Project.ID <- factor(hmDat2$Project.ID, levels = c("Squamous", "Gastric",
"Duodenum", "Barrett's", "ICGC-ESAD"))
hmDat2$RP.BarrettsAdjacent <- factor(hmDat2$RP.BarrettsAdjacent, levels = c("yes",
"no", "ND"))
hmDat2$RP.TumourGradingDifferentiationStatus <- factor(hmDat2$RP.TumourGradingDifferentiationStatus,
levels = c("poor", "moderate_to_poor", "moderate", "moderate_to_well", "well",
"unknown", NA))
hmDat2$final <- factor(hmDat2$final, levels = c("SquamousNormalND", "GastricNormalND",
"DuodenumNormalND", "Barrett'sNotNormalND", "ICGC-ESADNotNormalyes", "ICGC-ESADNotNormalno",
"ICGC-ESADNotNormalND"))
# plot all data without subgroups
p_all <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("All") + geom_point(position = position_jitterdodge(jitter.width = 0.5,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) +
theme_bw() + ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
p_all
# remove duodenum from the figure
hmDat3 <- hmDat2[hmDat2$primary_diagnosis != "Duodenum", ]
# plot all data without subgroups
p_all <- ggplot(hmDat3, aes(x = Project.ID, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("All without Duodenum") +
geom_point(position = position_jitterdodge(jitter.width = 0.5, dodge.width = 1,
seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) + theme_bw() +
ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(3,
4)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
p_all
# plot all data with cancers split according to Barrett's status
p <- ggplot(hmDat2, aes(x = final, y = y, fill = colour)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.8) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("Barrett's next to") +
geom_point(position = position_jitterdodge(jitter.width = 0.4, dodge.width = 1,
seed = 50014), pch = 21, aes(fill = colour), size = 2.5) + theme_bw() + ylab("Normalized expression") +
xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p <- p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(5,
6), c(4, 5), c(4, 6)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01,
0.05, 1), symbols = c("****", "***", "**", "*", "ns")))
p
ggsave(paste0(files.out, "./Figure_5/Figure_5C_Endocrine_all.pdf"), p_all, width = 12,
height = 7, useDingbats = FALSE)
kable(table(hmDat2$RP.BarrettsAdjacent[hmDat2$Project.ID == "ICGC-ESAD"]))
| Var1 | Freq |
|---|---|
| yes | 107 |
| no | 109 |
| ND | 0 |
# plot all data with cancers split according to Siewer Classification
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.SiewertClassification)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Siewert type") + geom_point(position = position_jitterdodge(jitter.width = 0.4,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.SiewertClassification),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
# plot all data with cancers split according to Tumour Differentiation Status
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.TumourGradingDifferentiationStatus)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Differentiation") + geom_point(position = position_jitterdodge(jitter.width = 0.05,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.TumourGradingDifferentiationStatus),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
# plot all data with cancers split according to Tumour stage
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.TStage.PrimaryTumour)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Stage") + geom_point(position = position_jitterdodge(jitter.width = 0.05,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.TStage.PrimaryTumour),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "BE Endocrine markers CAM BE, NG, ND and EAC - z-score",
show_rownames = FALSE, cluster_rows = T, scale = "row", breaks = breaksListZ,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(length(breaksListZ)),
show_colnames = FALSE, annotation_colors = anno_col, file = paste0(files.out,
"/Figure_5/Figure_5C_Endocrine_heatmap_all.pdf"), width = 15)
iiii <- c("BE_undiff_specific")
genes.list <- vector()
for (i in iiii) {
genes.list <- unique(c(genes.list, markers[[i]]$ID))
}
# plot z-score data
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
3, 4, 6), drop = FALSE], clustering_method = "ward.D2", main = "BE Undifferentiated markers CAM BE, GC, D2 and EAC - z-score",
show_rownames = FALSE, cluster_rows = T, scale = "row", breaks = breaksListZ,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(length(breaksListZ)),
show_colnames = FALSE, annotation_colors = anno_col)
# plot raw expression data
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "BE Undifferentiated markers CAM BE, GC, D2 and EAC",
show_rownames = FALSE, cluster_rows = TRUE, show_colnames = FALSE, annotation_colors = anno_col)
# , scale='row', breaks = breaksListZ,color = colorRampPalette(rev(brewer.pal(n =
# 7, name = 'RdBu')))(length(breaksListZ)))
########################################### Scaled to BE samples only########
# get the data for all important genes
hmDat2 <- hmDat[rownames(hmDat) %in% genes.list, ]
# scale the data around the mean expression in BE samples without any scaling
# factor. I use scale fuction to do it with center and scale arguments. Then
# calculate the median expression of scaled expression in each of the samples.
hmDat2 <- apply(apply(hmDat2, 1, function(x) scale(x, center = mean(x[rownames(rowID)[rowID$primary_diagnosis ==
"Barrett's"]]), scale = FALSE)), 1, median)
# fix the names
names(hmDat2) <- colnames(hmDat[rownames(hmDat) %in% genes.list, ])
# merge the median expression per sample with other clinical information
hmDat2 <- merge(rowID, hmDat2, by = 0, all.y = TRUE)
# remove cancer samples without Barrett's status
hmDat2 <- hmDat2[!(hmDat2$RP.BarrettsAdjacent == "ND" & hmDat2$Project.ID == "ICGC-ESAD"),
]
hmDat2$is.normal <- ifelse(hmDat2$Project.ID %in% c("Squamous", "Gastric", "Duodenum"),
"Normal", "NotNormal")
hmDat2$final <- paste0(hmDat2$Project.ID, hmDat2$is.normal, hmDat2$RP.BarrettsAdjacent)
hmDat2$colour <- paste0(hmDat2$is.normal, hmDat2$RP.BarrettsAdjacent)
# order the samples for the boxplots
hmDat2$Project.ID <- factor(hmDat2$Project.ID, levels = c("Squamous", "Gastric",
"Duodenum", "Barrett's", "ICGC-ESAD"))
hmDat2$RP.BarrettsAdjacent <- factor(hmDat2$RP.BarrettsAdjacent, levels = c("yes",
"no", "ND"))
hmDat2$RP.TumourGradingDifferentiationStatus <- factor(hmDat2$RP.TumourGradingDifferentiationStatus,
levels = c("poor", "moderate_to_poor", "moderate", "moderate_to_well", "well",
"unknown", NA))
hmDat2$final <- factor(hmDat2$final, levels = c("SquamousNormalND", "GastricNormalND",
"DuodenumNormalND", "Barrett'sNotNormalND", "ICGC-ESADNotNormalyes", "ICGC-ESADNotNormalno",
"ICGC-ESADNotNormalND"))
# plot all data without subgroups
p_all <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("All") + geom_point(position = position_jitterdodge(jitter.width = 0.5,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) +
theme_bw() + ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
p_all
# remove duodenum from the figure
hmDat3 <- hmDat2[hmDat2$primary_diagnosis != "Duodenum", ]
# plot all data without subgroups
p_all <- ggplot(hmDat3, aes(x = Project.ID, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("All without Duodenum") +
geom_point(position = position_jitterdodge(jitter.width = 0.5, dodge.width = 1,
seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) + theme_bw() +
ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(3,
4)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
p_all
# plot all data with cancers split according to Barrett's status
p <- ggplot(hmDat2, aes(x = final, y = y, fill = colour)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.8) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("Barrett's next to") +
geom_point(position = position_jitterdodge(jitter.width = 0.4, dodge.width = 1,
seed = 50014), pch = 21, aes(fill = colour), size = 2.5) + theme_bw() + ylab("Normalized expression") +
xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p <- p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(5,
6), c(4, 5), c(4, 6)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01,
0.05, 1), symbols = c("****", "***", "**", "*", "ns")))
p
ggsave(paste0(files.out, "./Figure_5/Figure_5D_Undiff.pdf"), p_all, width = 12, height = 7,
useDingbats = FALSE)
# wilcox.test(hmDat2$value[hmDat2$RP.BarrettsAdjacent == 'no'],
# hmDat2$value[hmDat2$RP.BarrettsAdjacent == 'yes'])
# wilcox.test(hmDat2.1$value[hmDat2.1$RP.BarrettsAdjacent == 'no'],
# hmDat2.1$value[hmDat2.1$RP.BarrettsAdjacent == 'ND' & hmDat2.1$Project.ID ==
# 'ICGC-ESAD']) wilcox.test(hmDat2.1$value[hmDat2.1$RP.BarrettsAdjacent ==
# 'yes'], hmDat2.1$value[hmDat2.1$RP.BarrettsAdjacent == 'ND'&
# hmDat2.1$Project.ID == 'ICGC-ESAD'])
kable(table(hmDat2$RP.BarrettsAdjacent[hmDat2$Project.ID == "ICGC-ESAD"]))
| Var1 | Freq |
|---|---|
| yes | 107 |
| no | 109 |
| ND | 0 |
# plot all data with cancers split according to Siewer Classification
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.SiewertClassification)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Siewert type") + geom_point(position = position_jitterdodge(jitter.width = 0.4,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.SiewertClassification),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
# plot all data with cancers split according to Tumour Differentiation Status
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.TumourGradingDifferentiationStatus)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Differentiation") + geom_point(position = position_jitterdodge(jitter.width = 0.05,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.TumourGradingDifferentiationStatus),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
# plot all data with cancers split according to Tumour stage
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.TStage.PrimaryTumour)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Stage") + geom_point(position = position_jitterdodge(jitter.width = 0.05,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.TStage.PrimaryTumour),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "BE Undiff markers CAM BE, GC, D2 and EAC - z-score",
show_rownames = FALSE, cluster_rows = T, scale = "row", breaks = breaksListZ,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(length(breaksListZ)),
show_colnames = FALSE, annotation_colors = anno_col, file = paste0(files.out,
"/Figure_5/Figure_5D_Undiff_heatmap.pdf"), width = 15)
iiii <- c("NG_diff_specific")
genes.list <- vector()
for (i in iiii) {
genes.list <- unique(c(genes.list, markers[[i]]$ID))
}
# plot z-score data
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
3, 4, 6), drop = FALSE], clustering_method = "ward.D2", main = "BE Undifferentiated markers CAM BE, GC, D2 and EAC - z-score",
show_rownames = FALSE, cluster_rows = T, scale = "row", breaks = breaksListZ,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(length(breaksListZ)),
show_colnames = FALSE, annotation_colors = anno_col)
# plot raw expression data
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "BE Undifferentiated markers CAM BE, GC, D2 and EAC",
show_rownames = FALSE, cluster_rows = TRUE, show_colnames = FALSE, annotation_colors = anno_col)
# , scale='row', breaks = breaksListZ,color = colorRampPalette(rev(brewer.pal(n =
# 7, name = 'RdBu')))(length(breaksListZ)))
########################################### Scaled to GC samples only########
# get the data for all important genes
hmDat2 <- hmDat[rownames(hmDat) %in% genes.list, ]
# scale the data around the mean expression in NG samples without any scaling
# factor. I use scale fuction to do it with center and scale arguments. Then
# calculate the median expression of scaled expression in each of the samples.
hmDat2 <- apply(apply(hmDat2, 1, function(x) scale(x, center = mean(x[rownames(rowID)[rowID$primary_diagnosis ==
"Gastric"]]), scale = FALSE)), 1, median)
# fix the names
names(hmDat2) <- colnames(hmDat[rownames(hmDat) %in% genes.list, ])
# merge the median expression per sample with other clinical information
hmDat2 <- merge(rowID, hmDat2, by = 0, all.y = TRUE)
# remove cancer samples without Barrett's status
hmDat2 <- hmDat2[!(hmDat2$RP.BarrettsAdjacent == "ND" & hmDat2$Project.ID == "ICGC-ESAD"),
]
hmDat2$is.normal <- ifelse(hmDat2$Project.ID %in% c("Squamous", "Gastric", "Duodenum"),
"Normal", "NotNormal")
hmDat2$final <- paste0(hmDat2$Project.ID, hmDat2$is.normal, hmDat2$RP.BarrettsAdjacent)
hmDat2$colour <- paste0(hmDat2$is.normal, hmDat2$RP.BarrettsAdjacent)
# order the samples for the boxplots
hmDat2$Project.ID <- factor(hmDat2$Project.ID, levels = c("Squamous", "Gastric",
"Duodenum", "Barrett's", "ICGC-ESAD"))
hmDat2$RP.BarrettsAdjacent <- factor(hmDat2$RP.BarrettsAdjacent, levels = c("yes",
"no", "ND"))
hmDat2$RP.TumourGradingDifferentiationStatus <- factor(hmDat2$RP.TumourGradingDifferentiationStatus,
levels = c("poor", "moderate_to_poor", "moderate", "moderate_to_well", "well",
"unknown", NA))
hmDat2$final <- factor(hmDat2$final, levels = c("SquamousNormalND", "GastricNormalND",
"DuodenumNormalND", "Barrett'sNotNormalND", "ICGC-ESADNotNormalyes", "ICGC-ESADNotNormalno",
"ICGC-ESADNotNormalND"))
# plot all data without subgroups
p_all <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("All") + geom_point(position = position_jitterdodge(jitter.width = 0.5,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) +
theme_bw() + ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(2,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
p_all
# remove duodenum from the figure
hmDat3 <- hmDat2[hmDat2$primary_diagnosis != "Duodenum", ]
# plot all data without subgroups
p_all <- ggplot(hmDat3, aes(x = Project.ID, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("All without Duodenum") +
geom_point(position = position_jitterdodge(jitter.width = 0.5, dodge.width = 1,
seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) + theme_bw() +
ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(2,
4)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
p_all
# plot all data with cancers split according to Barrett's status
p <- ggplot(hmDat2, aes(x = final, y = y, fill = colour)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.8) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("Barrett's next to") +
geom_point(position = position_jitterdodge(jitter.width = 0.4, dodge.width = 1,
seed = 50014), pch = 21, aes(fill = colour), size = 2.5) + theme_bw() + ylab("Normalized expression") +
xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p <- p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(5,
6), c(2, 5), c(2, 6)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01,
0.05, 1), symbols = c("****", "***", "**", "*", "ns")))
p
ggsave(paste0(files.out, "./Supplementary_figures/S18/Figure_S18A_NG_Diff.pdf"),
p_all, width = 12, height = 7, useDingbats = FALSE)
p_all <- ggplot(hmDat3, aes(x = final, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle(paste0("All without Duodenum ",
length(genes.list))) + geom_point(position = position_jitterdodge(jitter.width = 0.5,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) +
theme_bw() + ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5), c(2, 4), c(2, 5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01,
0.05, 1), symbols = c("****", "***", "**", "*", "ns")))
p_all
p_all <- p_all + theme(legend.position = "none")
ggsave(paste0(files.out, "./Supplementary_figures/S18/Figure_S18A_NG_Diff_BE.pdf"),
p_all, width = 12, height = 7, useDingbats = FALSE)
kable(table(hmDat2$RP.BarrettsAdjacent[hmDat2$Project.ID == "ICGC-ESAD"]))
| Var1 | Freq |
|---|---|
| yes | 107 |
| no | 109 |
| ND | 0 |
# plot all data with cancers split according to Siewer Classification
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.SiewertClassification)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Siewert type") + geom_point(position = position_jitterdodge(jitter.width = 0.4,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.SiewertClassification),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(2,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
# plot all data with cancers split according to Tumour Differentiation Status
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.TumourGradingDifferentiationStatus)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Differentiation") + geom_point(position = position_jitterdodge(jitter.width = 0.05,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.TumourGradingDifferentiationStatus),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(2,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
# plot all data with cancers split according to Tumour stage
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.TStage.PrimaryTumour)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Stage") + geom_point(position = position_jitterdodge(jitter.width = 0.05,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.TStage.PrimaryTumour),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(2,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "NG Diff markers CAM BE, GC, D2 and EAC - z-score",
show_rownames = FALSE, cluster_rows = T, scale = "row", breaks = breaksListZ,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(length(breaksListZ)),
show_colnames = FALSE, annotation_colors = anno_col, file = paste0(files.out,
"/Supplementary_figures/S18/Figure_S18A_NG_Diff_heatmap.pdf"), width = 15)
iiii <- c("NG_undiff_specific")
genes.list <- vector()
for (i in iiii) {
genes.list <- unique(c(genes.list, markers[[i]]$ID))
}
# plot z-score data
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
3, 4, 6), drop = FALSE], clustering_method = "ward.D2", main = "BE Undifferentiated markers CAM BE, GC, D2 and EAC - z-score",
show_rownames = FALSE, cluster_rows = T, scale = "row", breaks = breaksListZ,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(length(breaksListZ)),
show_colnames = FALSE, annotation_colors = anno_col)
# plot raw expression data
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "BE Undifferentiated markers CAM BE, GC, D2 and EAC",
show_rownames = FALSE, cluster_rows = TRUE, show_colnames = FALSE, annotation_colors = anno_col)
# , scale='row', breaks = breaksListZ,color = colorRampPalette(rev(brewer.pal(n =
# 7, name = 'RdBu')))(length(breaksListZ)))
########################################### Scaled to GC samples only########
# get the data for all important genes
hmDat2 <- hmDat[rownames(hmDat) %in% genes.list, ]
# scale the data around the mean expression in NG samples without any scaling
# factor. I use scale fuction to do it with center and scale arguments. Then
# calculate the median expression of scaled expression in each of the samples.
hmDat2 <- apply(apply(hmDat2, 1, function(x) scale(x, center = mean(x[rownames(rowID)[rowID$primary_diagnosis ==
"Gastric"]]), scale = FALSE)), 1, median)
# fix the names
names(hmDat2) <- colnames(hmDat[rownames(hmDat) %in% genes.list, ])
# merge the median expression per sample with other clinical information
hmDat2 <- merge(rowID, hmDat2, by = 0, all.y = TRUE)
# remove cancer samples without Barrett's status
hmDat2 <- hmDat2[!(hmDat2$RP.BarrettsAdjacent == "ND" & hmDat2$Project.ID == "ICGC-ESAD"),
]
hmDat2$is.normal <- ifelse(hmDat2$Project.ID %in% c("Squamous", "Gastric", "Duodenum"),
"Normal", "NotNormal")
hmDat2$final <- paste0(hmDat2$Project.ID, hmDat2$is.normal, hmDat2$RP.BarrettsAdjacent)
hmDat2$colour <- paste0(hmDat2$is.normal, hmDat2$RP.BarrettsAdjacent)
# order the samples for the boxplots
hmDat2$Project.ID <- factor(hmDat2$Project.ID, levels = c("Squamous", "Gastric",
"Duodenum", "Barrett's", "ICGC-ESAD"))
hmDat2$RP.BarrettsAdjacent <- factor(hmDat2$RP.BarrettsAdjacent, levels = c("yes",
"no", "ND"))
hmDat2$RP.TumourGradingDifferentiationStatus <- factor(hmDat2$RP.TumourGradingDifferentiationStatus,
levels = c("poor", "moderate_to_poor", "moderate", "moderate_to_well", "well",
"unknown", NA))
hmDat2$final <- factor(hmDat2$final, levels = c("SquamousNormalND", "GastricNormalND",
"DuodenumNormalND", "Barrett'sNotNormalND", "ICGC-ESADNotNormalyes", "ICGC-ESADNotNormalno",
"ICGC-ESADNotNormalND"))
# plot all data without subgroups
p_all <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("All") + geom_point(position = position_jitterdodge(jitter.width = 0.5,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) +
theme_bw() + ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(2,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
p_all
# remove duodenum from the figure
hmDat3 <- hmDat2[hmDat2$primary_diagnosis != "Duodenum", ]
# plot all data without subgroups
p_all <- ggplot(hmDat3, aes(x = Project.ID, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("All without Duodenum") +
geom_point(position = position_jitterdodge(jitter.width = 0.5, dodge.width = 1,
seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) + theme_bw() +
ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(2,
4)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
p_all
# plot all data with cancers split according to Barrett's status
p <- ggplot(hmDat2, aes(x = final, y = y, fill = colour)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.8) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("Barrett's next to") +
geom_point(position = position_jitterdodge(jitter.width = 0.4, dodge.width = 1,
seed = 50014), pch = 21, aes(fill = colour), size = 2.5) + theme_bw() + ylab("Normalized expression") +
xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p <- p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(5,
6), c(2, 5), c(2, 6)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01,
0.05, 1), symbols = c("****", "***", "**", "*", "ns")))
p
ggsave(paste0(files.out, "./Supplementary_figures/S18/Figure_S18B_NG_Undiff.pdf"),
p_all, width = 12, height = 7, useDingbats = FALSE)
p_all <- ggplot(hmDat3, aes(x = final, y = y, fill = Project.ID)) + geom_boxplot(position = position_dodge2(1,
preserve = "single"), outlier.alpha = 0, width = 0.75) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle(paste0("All without Duodenum ",
length(genes.list))) + geom_point(position = position_jitterdodge(jitter.width = 0.5,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = Project.ID), size = 2.5) +
theme_bw() + ylab("Normalized expression") + xlab("Sample Type") + scale_fill_manual(values = anno_col$Project.ID) #c('#BBDCAFFF', '#BBDCAFFF', '#BBDCAFFF', '#00A087FF', '#E0B400FF'))
p_all <- p_all + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(4,
5), c(2, 4), c(2, 5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01,
0.05, 1), symbols = c("****", "***", "**", "*", "ns")))
p_all
p_all <- p_all + theme(legend.position = "none")
ggsave(paste0(files.out, "./Supplementary_figures/S18/Figure_S18A_NG_Undiff_BE.pdf"),
p_all, width = 12, height = 7, useDingbats = FALSE)
# wilcox.test(hmDat2$value[hmDat2$RP.BarrettsAdjacent == 'no'],
# hmDat2$value[hmDat2$RP.BarrettsAdjacent == 'yes'])
# wilcox.test(hmDat2.1$value[hmDat2.1$RP.BarrettsAdjacent == 'no'],
# hmDat2.1$value[hmDat2.1$RP.BarrettsAdjacent == 'ND' & hmDat2.1$Project.ID ==
# 'ICGC-ESAD']) wilcox.test(hmDat2.1$value[hmDat2.1$RP.BarrettsAdjacent ==
# 'yes'], hmDat2.1$value[hmDat2.1$RP.BarrettsAdjacent == 'ND'&
# hmDat2.1$Project.ID == 'ICGC-ESAD'])
kable(table(hmDat2$RP.BarrettsAdjacent[hmDat2$Project.ID == "ICGC-ESAD"]))
| Var1 | Freq |
|---|---|
| yes | 107 |
| no | 109 |
| ND | 0 |
# plot all data with cancers split according to Siewer Classification
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.SiewertClassification)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Siewert type") + geom_point(position = position_jitterdodge(jitter.width = 0.4,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.SiewertClassification),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(2,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
# plot all data with cancers split according to Tumour Differentiation Status
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.TumourGradingDifferentiationStatus)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Differentiation") + geom_point(position = position_jitterdodge(jitter.width = 0.05,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.TumourGradingDifferentiationStatus),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(2,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
# plot all data with cancers split according to Tumour stage
p <- ggplot(hmDat2, aes(x = Project.ID, y = y, fill = RP.TStage.PrimaryTumour)) +
geom_boxplot(position = position_dodge2(1, preserve = "single"), outlier.alpha = 0,
width = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Stage") + geom_point(position = position_jitterdodge(jitter.width = 0.05,
dodge.width = 1, seed = 50014), pch = 21, aes(fill = RP.TStage.PrimaryTumour),
size = 2.5) + theme_bw() + ylab("Normalized expression") + xlab("Sample Type")
# p+stat_compare_means(method = 'wilcox.test', paired = FALSE, comparisons =
# rev(list(c('Barrett's', 'ICGC-ESAD'),c('Barrett's', 'Gastric'),c('Barrett's',
# 'Duodenum'), c('no', 'yes'))) )
p + stat_compare_means(method = "wilcox.test", paired = FALSE, comparisons = list(c(2,
5)), symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 1), symbols = c("****",
"***", "**", "*", "ns")))
pheatmap(hmDat[rownames(hmDat) %in% genes.list, ], annotation_col = rowID[, c(1,
6), drop = FALSE], clustering_method = "ward.D2", main = "NG Undiff markers CAM BE, GC, D2 and EAC - z-score",
show_rownames = FALSE, cluster_rows = T, scale = "row", breaks = breaksListZ,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(length(breaksListZ)),
show_colnames = FALSE, annotation_colors = anno_col, file = paste0(files.out,
"/Supplementary_figures/S18/Figure_S18B_NG_Undiff_heatmap.pdf"), width = 15)
To finish get session info: R version 3.6.2 (2019-12-12) Platform: x86_64-redhat-linux-gnu (64-bit) Running under: Fedora 31 (Workstation Edition)
Matrix products: default BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
locale: [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
[5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
[7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
attached base packages: [1] parallel stats4 stats graphics grDevices utils datasets [8] methods base
other attached packages: [1] destiny_3.0.1 dbscan_1.1-5
[3] princurve_2.1.4 dynamicTreeCut_1.63-1
[5] scran_1.14.6 scater_1.14.6
[7] SingleCellExperiment_1.8.0 SummarizedExperiment_1.16.1 [9] DelayedArray_0.12.2 BiocParallel_1.20.1
[11] matrixStats_0.55.0 Biobase_2.46.0
[13] GenomicRanges_1.38.0 GenomeInfoDb_1.22.0
[15] IRanges_2.20.2 S4Vectors_0.24.3
[17] BiocGenerics_0.32.0 gridExtra_2.3
[19] knitr_1.28 ggpubr_0.2.5
[21] magrittr_1.5 reshape2_1.4.3
[23] RColorBrewer_1.1-2 openxlsx_4.1.4
[25] pheatmap_1.0.12 ggplot2_3.2.1
[27] edgeR_3.28.0 limma_3.42.2
[29] rmarkdown_2.1
loaded via a namespace (and not attached): [1] ggbeeswarm_0.6.0 colorspace_1.4-1 RcppEigen_0.3.3.7.0
[4] ggsignif_0.6.0 class_7.3-15 rio_0.5.16
[7] XVector_0.26.0 RcppHNSW_0.2.0 BiocNeighbors_1.4.1
[10] proxy_0.4-23 hexbin_1.28.1 farver_2.0.3
[13] RSpectra_0.16-0 ranger_0.12.1 codetools_0.2-16
[16] splines_3.6.2 robustbase_0.93-5 compiler_3.6.2
[19] ggplot.multistats_1.0.0 dqrng_0.2.1 assertthat_0.2.1
[22] Matrix_1.2-18 lazyeval_0.2.2 BiocSingular_1.2.1
[25] formatR_1.7 htmltools_0.4.0 tools_3.6.2
[28] rsvd_1.0.2 igraph_1.2.4.2 gtable_0.3.0
[31] glue_1.3.1 GenomeInfoDbData_1.2.2 dplyr_0.8.4
[34] ggthemes_4.2.0 Rcpp_1.0.3 carData_3.0-3
[37] cellranger_1.1.0 vctrs_0.2.2 DelayedMatrixStats_1.8.0 [40] lmtest_0.9-37 xfun_0.12 laeken_0.5.1
[43] stringr_1.4.0 lifecycle_0.1.0 irlba_2.3.3
[46] statmod_1.4.33 DEoptimR_1.0-8 zoo_1.8-7
[49] zlibbioc_1.32.0 MASS_7.3-51.4 scales_1.1.0
[52] VIM_5.1.0 pcaMethods_1.78.0 hms_0.5.3
[55] yaml_2.2.1 curl_4.3 stringi_1.4.5
[58] highr_0.8 e1071_1.7-3 TTR_0.23-6
[61] boot_1.3-23 zip_2.0.4 rlang_0.4.4
[64] pkgconfig_2.0.3 bitops_1.0-6 evaluate_0.14
[67] lattice_0.20-38 purrr_0.3.3 labeling_0.3
[70] tidyselect_1.0.0 plyr_1.8.5 R6_2.4.1
[73] pillar_1.4.3 haven_2.2.0 foreign_0.8-72
[76] withr_2.1.2 xts_0.12-0 scatterplot3d_0.3-41
[79] abind_1.4-5 RCurl_1.98-1.1 sp_1.3-2
[82] nnet_7.3-12 tibble_2.1.3 crayon_1.3.4
[85] car_3.0-6 viridis_0.5.1 locfit_1.5-9.1
[88] grid_3.6.2 readxl_1.3.1 data.table_1.12.8
[91] forcats_0.4.0 vcd_1.4-5 digest_0.6.24
[94] tidyr_1.0.2 munsell_0.5.0 beeswarm_0.2.3
[97] viridisLite_0.3.0 smoother_1.1 vipor_0.4.5